# _*_ coding: utf-8 _*_
# @Date : 2023/3/16 14:16
# @Author : Paul
# @File : preview_data.py
# @Description :
from core.utils.log_util import LogUtil

from core.beans.preview_data_result import PreviewDataResult
from core.data_source.meta_data_source.meta_data_source import MetaDataSource

from core.utils.data_souce_init_utils import DataSourceInitUtil

from core.utils.date_util import DateUtil
import pandas as pd
import io
import json
import sys


class PreviewData():

    def __init__(self,
                 app_name="clusters",
                 param = None,
                 data_source_id=None,
                 table_name=None,
                 train_cols=None):
        """
        初始化类
        :param app_name:
        :param data_source_id:
        :param table_name:
        """
        # 开始时间
        self.start_time = DateUtil.getCurrentDate()
        self.app_name = app_name
        self.param = param
        self.data_source_id = data_source_id
        self.table_name = table_name
        self.train_cols = train_cols
        # 获取元数据库
        self.meta_data_source = MetaDataSource()
        # 获取训练集所在的数据源
        self.data_source = DataSourceInitUtil.getDataBase(self.meta_data_source,
                                                          data_source_id)

    def getDataDetail(self):
        """
        获取数据详情
        :return:
        """
        data_query_sql = "select * from {} limit 50".format(self.table_name)
        data = self.data_source.queryAll(data_query_sql)
        train_data = pd.DataFrame(data=data,
                                  columns=self.train_cols)
        # 数据的简要摘要
        buf = io.StringIO()  # 创建一个StringIO，便于后续在内存中写入str
        train_data.info(buf=buf)  # 写入
        self.info = buf.getvalue()  # 读取

        # 统计学估计
        self.describe = train_data.describe()

        # 获取数据
        data_dict = self.data_source.queryOrderedDict(data_query_sql)

        # 结束时间
        end_time = DateUtil.getCurrentDate()
        cost_second = DateUtil.diffMin(self.start_time, end_time)

        # 模型结果存入mysql
        preview_result = PreviewDataResult(self.param["id"],
                                            "preview_data",
                                            self.param,
                                            self.app_name,
                                            self.info,
                                            self.describe,
                                           self.train_cols,
                                           json.dumps(data_dict).replace("\"", "\\\""),
                                            self.start_time,
                                            end_time,
                                            cost_second)
        LogUtil.savePreViewDataResult(self.meta_data_source, preview_result)

if __name__ == '__main__':
    argv = sys.argv[1]
    param = json.loads(argv)
    app_name = param["appName"]
    data_source_id = param["dataSourceId"]
    table_name = param["tableName"]
    train_cols = param["trainCols"]
    preview = PreviewData(app_name=app_name, param=param, data_source_id=data_source_id, table_name=table_name, train_cols=train_cols)
    preview.getDataDetail()
